In [1]:
Copied!
## import
import lamindb as ln
import scanpy as sc
from scprint import scPrint
from scdataloader.data import SimpleAnnDataset
from scdataloader import Preprocessor, Collator
from torch.utils.data import DataLoader
from scdataloader.utils import load_genes
import numpy as np
import anndata as ad
from scipy.sparse import csr_matrix
import pandas as pd
from scib_metrics.benchmark import Benchmarker
from lightning.pytorch import Trainer
%load_ext autoreload
%autoreload 2
import torch
torch.set_float32_matmul_precision('medium')
## import
import lamindb as ln
import scanpy as sc
from scprint import scPrint
from scdataloader.data import SimpleAnnDataset
from scdataloader import Preprocessor, Collator
from torch.utils.data import DataLoader
from scdataloader.utils import load_genes
import numpy as np
import anndata as ad
from scipy.sparse import csr_matrix
import pandas as pd
from scib_metrics.benchmark import Benchmarker
from lightning.pytorch import Trainer
%load_ext autoreload
%autoreload 2
import torch
torch.set_float32_matmul_precision('medium')
💡 lamindb instance: jkobject/scdataloader
2024-02-27 15:37:24,549:INFO - Created a temporary directory at /tmp/tmpn_kldnkg 2024-02-27 15:37:24,551:INFO - Writing /tmp/tmpn_kldnkg/_remote_module_non_scriptable.py 2024-02-27 15:37:24,551:INFO - Writing /tmp/tmpn_kldnkg/_remote_module_non_scriptable.py
preparing the model¶
In [48]:
Copied!
model.gene_encoder.embedding.weight
model.gene_encoder.embedding.weight
Out[48]:
Parameter containing:
tensor([[ 0.0037, 0.0200, -0.0034, ..., 0.0202, 0.0345, -0.0313],
[-0.0007, 0.0393, -0.0069, ..., 0.0281, 0.0400, -0.0190],
[-0.0175, 0.0621, -0.0132, ..., 0.0161, 0.0288, -0.0111],
...,
[-0.0217, 0.0621, -0.0146, ..., 0.0505, 0.0368, -0.0321],
[ 0.0173, 0.0463, -0.0083, ..., 0.0037, 0.0115, -0.0153],
[ 0.0525, 0.0525, -0.0402, ..., 0.0067, 0.0007, -0.0331]],
device='cuda:0')
In [61]:
Copied!
model.gene_encoder.embedding.weight[-5]
model.gene_encoder.embedding.weight[-5]
Out[61]:
tensor([-2.4019e-02, -1.1195e-02, 1.5377e-02, -1.5292e-02, -3.8597e-03,
8.3640e-04, -2.7142e-02, 1.9008e-02, -8.6371e-03, -1.5238e-02,
-1.6833e-02, 1.7925e-02, -2.9785e-03, -1.7331e-02, 1.6175e-02,
2.5416e-03, 2.2125e-02, -6.5159e-03, 6.1229e-03, -8.2164e-04,
1.5580e-03, -1.4043e-03, 5.7557e-03, 1.7301e-02, 1.7752e-02,
-6.6907e-03, 2.1567e-02, 2.5564e-02, -2.2414e-03, 2.4870e-03,
-7.5531e-03, -3.0830e-02, -7.8189e-03, 3.9240e-03, -1.8739e-02,
-1.1813e-02, -5.8855e-03, 1.1147e-02, 1.1393e-02, -1.3492e-03,
-1.2266e-02, -1.1268e-02, 1.9579e-02, -1.2649e-03, 3.9618e-02,
8.5675e-03, 6.7224e-03, 2.3654e-02, 3.3809e-02, -6.0911e-04,
-4.3977e-03, -3.0629e-03, -3.3121e-02, 1.3739e-02, -1.9411e-02,
1.8377e-02, 4.3772e-03, -3.6158e-02, 3.2731e-03, 2.9397e-02,
-8.0602e-03, 1.9550e-02, -9.7988e-03, -1.3292e-02, -6.1542e-03,
-4.6554e-03, -4.3562e-03, -7.5146e-03, 1.5390e-02, 3.6237e-02,
3.9053e-02, 4.0985e-03, 2.9162e-02, 5.7538e-03, 5.5284e-03,
-2.5665e-02, 2.4280e-02, 8.1235e-03, -2.2968e-02, 7.5519e-03,
7.4942e-03, 1.1842e-02, 2.1870e-02, -3.1771e-03, 1.5563e-02,
2.4190e-02, -5.0837e-02, -6.7125e-03, -2.7193e-02, 1.2300e-02,
1.9444e-02, -3.5305e-03, 4.0222e-02, 1.3722e-02, 1.2304e-02,
5.9529e-03, 9.8232e-03, 1.6579e-02, -1.2398e-02, 6.0710e-06,
2.0671e-02, 3.6629e-03, -1.4828e-02, -4.6302e-03, 4.1376e-02,
-1.6236e-02, 6.6806e-03, 2.6937e-02, 3.4397e-02, -1.3208e-02,
2.1328e-03, -2.2912e-02, 4.8714e-02, -1.6899e-03, -3.5980e-02,
-2.0907e-03, 3.5940e-02, 2.6971e-02, -1.5465e-02, 1.8554e-02,
-2.8039e-02, -2.6164e-02, 3.3551e-02, -3.4469e-02, 4.4606e-02,
-7.3048e-03, 1.6316e-02, 3.2521e-02], device='cuda:0')
In [118]:
Copied!
embeddings = pd.read_parquet('../../data/temp/embeddings.parquet').loc[model.genes]
sembeddings = torch.nn.AdaptiveAvgPool1d(128)(
torch.tensor(embeddings.values)
)
#model.gene_encoder.embedding.weight.data.copy_(torch.Tensor(sembeddings))
embeddings = pd.read_parquet('../../data/temp/embeddings.parquet').loc[model.genes]
sembeddings = torch.nn.AdaptiveAvgPool1d(128)(
torch.tensor(embeddings.values)
)
#model.gene_encoder.embedding.weight.data.copy_(torch.Tensor(sembeddings))
In [120]:
Copied!
model.genes
model.genes
Out[120]:
['ENSG00000000003', 'ENSG00000000005', 'ENSG00000000419', 'ENSG00000000457', 'ENSG00000000460', 'ENSG00000000938', 'ENSG00000000971', 'ENSG00000001036', 'ENSG00000001084', 'ENSG00000001167', 'ENSG00000001460', 'ENSG00000001461', 'ENSG00000001497', 'ENSG00000001561', 'ENSG00000001617', 'ENSG00000001626', 'ENSG00000001629', 'ENSG00000001630', 'ENSG00000001631', 'ENSG00000002016', 'ENSG00000002330', 'ENSG00000002549', 'ENSG00000002586', 'ENSG00000002587', 'ENSG00000002726', 'ENSG00000002745', 'ENSG00000002746', 'ENSG00000002822', 'ENSG00000002834', 'ENSG00000002919', 'ENSG00000002933', 'ENSG00000003056', 'ENSG00000003096', 'ENSG00000003137', 'ENSG00000003147', 'ENSG00000003249', 'ENSG00000003393', 'ENSG00000003400', 'ENSG00000003402', 'ENSG00000003436', 'ENSG00000003509', 'ENSG00000003756', 'ENSG00000003987', 'ENSG00000003989', 'ENSG00000004059', 'ENSG00000004139', 'ENSG00000004142', 'ENSG00000004399', 'ENSG00000004455', 'ENSG00000004468', 'ENSG00000004478', 'ENSG00000004487', 'ENSG00000004534', 'ENSG00000004660', 'ENSG00000004700', 'ENSG00000004766', 'ENSG00000004776', 'ENSG00000004777', 'ENSG00000004779', 'ENSG00000004799', 'ENSG00000004809', 'ENSG00000004838', 'ENSG00000004846', 'ENSG00000004848', 'ENSG00000004864', 'ENSG00000004866', 'ENSG00000004897', 'ENSG00000004939', 'ENSG00000004948', 'ENSG00000004961', 'ENSG00000004975', 'ENSG00000005001', 'ENSG00000005007', 'ENSG00000005020', 'ENSG00000005022', 'ENSG00000005059', 'ENSG00000005073', 'ENSG00000005075', 'ENSG00000005100', 'ENSG00000005102', 'ENSG00000005108', 'ENSG00000005156', 'ENSG00000005175', 'ENSG00000005187', 'ENSG00000005189', 'ENSG00000005194', 'ENSG00000005206', 'ENSG00000005238', 'ENSG00000005243', 'ENSG00000005249', 'ENSG00000005302', 'ENSG00000005339', 'ENSG00000005379', 'ENSG00000005381', 'ENSG00000005421', 'ENSG00000005436', 'ENSG00000005448', 'ENSG00000005469', 'ENSG00000005471', 'ENSG00000005483', 'ENSG00000005486', 'ENSG00000005513', 'ENSG00000005700', 'ENSG00000005801', 'ENSG00000005810', 'ENSG00000005812', 'ENSG00000005844', 'ENSG00000005882', 'ENSG00000005884', 'ENSG00000005889', 'ENSG00000005893', 'ENSG00000005961', 'ENSG00000005981', 'ENSG00000006007', 'ENSG00000006015', 'ENSG00000006016', 'ENSG00000006025', 'ENSG00000006042', 'ENSG00000006047', 'ENSG00000006059', 'ENSG00000006062', 'ENSG00000006071', 'ENSG00000006116', 'ENSG00000006118', 'ENSG00000006125', 'ENSG00000006128', 'ENSG00000006194', 'ENSG00000006210', 'ENSG00000006282', 'ENSG00000006283', 'ENSG00000006327', 'ENSG00000006377', 'ENSG00000006432', 'ENSG00000006451', 'ENSG00000006453', 'ENSG00000006459', 'ENSG00000006468', 'ENSG00000006530', 'ENSG00000006534', 'ENSG00000006555', 'ENSG00000006576', 'ENSG00000006606', 'ENSG00000006607', 'ENSG00000006611', 'ENSG00000006625', 'ENSG00000006634', 'ENSG00000006638', 'ENSG00000006652', 'ENSG00000006659', 'ENSG00000006695', 'ENSG00000006704', 'ENSG00000006712', 'ENSG00000006715', 'ENSG00000006740', 'ENSG00000006744', 'ENSG00000006747', 'ENSG00000006756', 'ENSG00000006757', 'ENSG00000006788', 'ENSG00000006831', 'ENSG00000006837', 'ENSG00000007001', 'ENSG00000007038', 'ENSG00000007047', 'ENSG00000007062', 'ENSG00000007080', 'ENSG00000007129', 'ENSG00000007168', 'ENSG00000007171', 'ENSG00000007174', 'ENSG00000007202', 'ENSG00000007216', 'ENSG00000007237', 'ENSG00000007255', 'ENSG00000007264', 'ENSG00000007306', 'ENSG00000007312', 'ENSG00000007314', 'ENSG00000007341', 'ENSG00000007350', 'ENSG00000007372', 'ENSG00000007376', 'ENSG00000007384', 'ENSG00000007392', 'ENSG00000007402', 'ENSG00000007516', 'ENSG00000007520', 'ENSG00000007541', 'ENSG00000007545', 'ENSG00000007866', 'ENSG00000007908', 'ENSG00000007923', 'ENSG00000007933', 'ENSG00000007944', 'ENSG00000007952', 'ENSG00000007968', 'ENSG00000008018', 'ENSG00000008056', 'ENSG00000008083', 'ENSG00000008086', 'ENSG00000008118', 'ENSG00000008128', 'ENSG00000008130', 'ENSG00000008196', 'ENSG00000008197', 'ENSG00000008226', 'ENSG00000008256', 'ENSG00000008277', 'ENSG00000008282', 'ENSG00000008283', 'ENSG00000008294', 'ENSG00000008300', 'ENSG00000008311', 'ENSG00000008323', 'ENSG00000008324', 'ENSG00000008382', 'ENSG00000008394', 'ENSG00000008405', 'ENSG00000008438', 'ENSG00000008441', 'ENSG00000008513', 'ENSG00000008516', 'ENSG00000008517', 'ENSG00000008710', 'ENSG00000008735', 'ENSG00000008838', 'ENSG00000008853', 'ENSG00000008869', 'ENSG00000008952', 'ENSG00000008988', 'ENSG00000009307', 'ENSG00000009335', 'ENSG00000009413', 'ENSG00000009694', 'ENSG00000009709', 'ENSG00000009724', 'ENSG00000009765', 'ENSG00000009780', 'ENSG00000009790', 'ENSG00000009830', 'ENSG00000009844', 'ENSG00000009950', 'ENSG00000009954', 'ENSG00000010017', 'ENSG00000010030', 'ENSG00000010072', 'ENSG00000010165', 'ENSG00000010219', 'ENSG00000010244', 'ENSG00000010256', 'ENSG00000010270', 'ENSG00000010278', 'ENSG00000010282', 'ENSG00000010292', 'ENSG00000010295', 'ENSG00000010310', 'ENSG00000010318', 'ENSG00000010319', 'ENSG00000010322', 'ENSG00000010327', 'ENSG00000010361', 'ENSG00000010379', 'ENSG00000010404', 'ENSG00000010438', 'ENSG00000010539', 'ENSG00000010610', 'ENSG00000010626', 'ENSG00000010671', 'ENSG00000010704', 'ENSG00000010803', 'ENSG00000010810', 'ENSG00000010818', 'ENSG00000010932', 'ENSG00000011007', 'ENSG00000011009', 'ENSG00000011021', 'ENSG00000011028', 'ENSG00000011052', 'ENSG00000011083', 'ENSG00000011105', 'ENSG00000011114', 'ENSG00000011132', 'ENSG00000011143', 'ENSG00000011198', 'ENSG00000011201', 'ENSG00000011243', 'ENSG00000011258', 'ENSG00000011260', 'ENSG00000011275', 'ENSG00000011295', 'ENSG00000011304', 'ENSG00000011332', 'ENSG00000011347', 'ENSG00000011376', 'ENSG00000011405', 'ENSG00000011422', 'ENSG00000011426', 'ENSG00000011451', 'ENSG00000011454', 'ENSG00000011465', 'ENSG00000011478', 'ENSG00000011485', 'ENSG00000011523', 'ENSG00000011566', 'ENSG00000011590', 'ENSG00000011600', 'ENSG00000011638', 'ENSG00000011677', 'ENSG00000012048', 'ENSG00000012061', 'ENSG00000012124', 'ENSG00000012171', 'ENSG00000012174', 'ENSG00000012211', 'ENSG00000012223', 'ENSG00000012232', 'ENSG00000012504', 'ENSG00000012660', 'ENSG00000012779', 'ENSG00000012817', 'ENSG00000012822', 'ENSG00000012963', 'ENSG00000012983', 'ENSG00000013016', 'ENSG00000013275', 'ENSG00000013288', 'ENSG00000013293', 'ENSG00000013297', 'ENSG00000013306', 'ENSG00000013364', 'ENSG00000013374', 'ENSG00000013375', 'ENSG00000013392', 'ENSG00000013441', 'ENSG00000013503', 'ENSG00000013523', 'ENSG00000013561', 'ENSG00000013563', 'ENSG00000013573', 'ENSG00000013583', 'ENSG00000013588', 'ENSG00000013619', 'ENSG00000013725', 'ENSG00000013810', 'ENSG00000014123', 'ENSG00000014138', 'ENSG00000014164', 'ENSG00000014216', 'ENSG00000014257', 'ENSG00000014641', 'ENSG00000014824', 'ENSG00000014914', 'ENSG00000014919', 'ENSG00000015133', 'ENSG00000015153', 'ENSG00000015171', 'ENSG00000015285', 'ENSG00000015413', 'ENSG00000015475', 'ENSG00000015479', 'ENSG00000015520', 'ENSG00000015532', 'ENSG00000015568', 'ENSG00000015592', 'ENSG00000015676', 'ENSG00000016082', 'ENSG00000016391', 'ENSG00000016402', 'ENSG00000016490', 'ENSG00000016602', 'ENSG00000016864', 'ENSG00000017260', 'ENSG00000017427', 'ENSG00000017483', 'ENSG00000017797', 'ENSG00000018189', 'ENSG00000018236', 'ENSG00000018280', 'ENSG00000018408', 'ENSG00000018510', 'ENSG00000018610', 'ENSG00000018625', 'ENSG00000018699', 'ENSG00000018869', 'ENSG00000019102', 'ENSG00000019144', 'ENSG00000019169', 'ENSG00000019186', 'ENSG00000019485', 'ENSG00000019505', 'ENSG00000019549', 'ENSG00000019582', 'ENSG00000019991', 'ENSG00000019995', 'ENSG00000020129', 'ENSG00000020181', 'ENSG00000020256', 'ENSG00000020426', 'ENSG00000020577', 'ENSG00000020633', 'ENSG00000020922', 'ENSG00000021300', 'ENSG00000021355', 'ENSG00000021461', 'ENSG00000021488', 'ENSG00000021574', 'ENSG00000021645', 'ENSG00000021762', 'ENSG00000021776', 'ENSG00000021826', 'ENSG00000021852', 'ENSG00000022267', 'ENSG00000022277', 'ENSG00000022355', 'ENSG00000022556', 'ENSG00000022567', 'ENSG00000022840', 'ENSG00000022976', 'ENSG00000023041', 'ENSG00000023171', 'ENSG00000023191', 'ENSG00000023228', 'ENSG00000023287', 'ENSG00000023318', 'ENSG00000023330', 'ENSG00000023445', 'ENSG00000023516', 'ENSG00000023572', 'ENSG00000023608', 'ENSG00000023697', 'ENSG00000023734', 'ENSG00000023839', 'ENSG00000023892', 'ENSG00000023902', 'ENSG00000023909', 'ENSG00000024048', 'ENSG00000024422', 'ENSG00000024526', 'ENSG00000024862', 'ENSG00000025039', 'ENSG00000025156', 'ENSG00000025293', 'ENSG00000025423', 'ENSG00000025434', 'ENSG00000025708', 'ENSG00000025770', 'ENSG00000025772', 'ENSG00000025796', 'ENSG00000025800', 'ENSG00000026025', 'ENSG00000026036', 'ENSG00000026103', 'ENSG00000026297', 'ENSG00000026508', 'ENSG00000026559', 'ENSG00000026652', 'ENSG00000026751', 'ENSG00000026950', 'ENSG00000027001', 'ENSG00000027075', 'ENSG00000027644', 'ENSG00000027697', 'ENSG00000027847', 'ENSG00000027869', 'ENSG00000028116', 'ENSG00000028137', 'ENSG00000028203', 'ENSG00000028277', 'ENSG00000028310', 'ENSG00000028528', 'ENSG00000028839', 'ENSG00000029153', 'ENSG00000029363', 'ENSG00000029364', 'ENSG00000029534', 'ENSG00000029559', 'ENSG00000029639', 'ENSG00000029725', 'ENSG00000029993', 'ENSG00000030066', 'ENSG00000030110', 'ENSG00000030304', 'ENSG00000030419', 'ENSG00000030582', 'ENSG00000031003', 'ENSG00000031081', 'ENSG00000031691', 'ENSG00000031698', 'ENSG00000031823', 'ENSG00000032219', 'ENSG00000032389', 'ENSG00000032444', 'ENSG00000032742', 'ENSG00000033011', 'ENSG00000033030', 'ENSG00000033050', 'ENSG00000033100', 'ENSG00000033122', 'ENSG00000033170', 'ENSG00000033178', 'ENSG00000033327', 'ENSG00000033627', 'ENSG00000033800', 'ENSG00000033867', 'ENSG00000034053', 'ENSG00000034152', 'ENSG00000034239', 'ENSG00000034510', 'ENSG00000034533', 'ENSG00000034677', 'ENSG00000034693', 'ENSG00000034713', 'ENSG00000034971', 'ENSG00000035115', 'ENSG00000035141', 'ENSG00000035403', 'ENSG00000035499', 'ENSG00000035664', 'ENSG00000035681', 'ENSG00000035687', 'ENSG00000035720', 'ENSG00000035862', 'ENSG00000035928', 'ENSG00000036054', 'ENSG00000036257', 'ENSG00000036448', 'ENSG00000036473', 'ENSG00000036530', 'ENSG00000036549', 'ENSG00000036565', 'ENSG00000036672', 'ENSG00000036828', 'ENSG00000037042', 'ENSG00000037241', 'ENSG00000037280', 'ENSG00000037474', 'ENSG00000037637', 'ENSG00000037749', 'ENSG00000037757', 'ENSG00000037897', 'ENSG00000037965', 'ENSG00000038002', 'ENSG00000038210', 'ENSG00000038219', 'ENSG00000038274', 'ENSG00000038295', 'ENSG00000038358', 'ENSG00000038382', 'ENSG00000038427', 'ENSG00000038532', 'ENSG00000038945', 'ENSG00000039068', 'ENSG00000039123', 'ENSG00000039139', 'ENSG00000039319', 'ENSG00000039523', 'ENSG00000039537', 'ENSG00000039560', 'ENSG00000039600', 'ENSG00000039650', 'ENSG00000039987', 'ENSG00000040199', 'ENSG00000040275', 'ENSG00000040341', 'ENSG00000040487', 'ENSG00000040531', 'ENSG00000040608', 'ENSG00000040633', 'ENSG00000040731', 'ENSG00000040933', 'ENSG00000041353', 'ENSG00000041357', 'ENSG00000041515', 'ENSG00000041802', 'ENSG00000041880', 'ENSG00000041982', 'ENSG00000041988', 'ENSG00000042062', 'ENSG00000042088', 'ENSG00000042286', 'ENSG00000042317', 'ENSG00000042429', 'ENSG00000042445', 'ENSG00000042493', 'ENSG00000042753', 'ENSG00000042781', 'ENSG00000042813', 'ENSG00000042832', 'ENSG00000042980', 'ENSG00000043039', 'ENSG00000043093', 'ENSG00000043143', 'ENSG00000043355', 'ENSG00000043462', 'ENSG00000043514', 'ENSG00000043591', 'ENSG00000044012', 'ENSG00000044090', 'ENSG00000044115', 'ENSG00000044446', 'ENSG00000044459', 'ENSG00000044524', 'ENSG00000044574', 'ENSG00000046604', 'ENSG00000046647', 'ENSG00000046651', 'ENSG00000046653', 'ENSG00000046774', 'ENSG00000046889', 'ENSG00000047056', 'ENSG00000047188', 'ENSG00000047230', 'ENSG00000047249', 'ENSG00000047315', 'ENSG00000047346', 'ENSG00000047365', 'ENSG00000047410', 'ENSG00000047457', 'ENSG00000047578', 'ENSG00000047579', 'ENSG00000047597', 'ENSG00000047617', 'ENSG00000047621', 'ENSG00000047634', 'ENSG00000047644', 'ENSG00000047648', 'ENSG00000047662', 'ENSG00000047849', 'ENSG00000047932', 'ENSG00000047936', 'ENSG00000048028', 'ENSG00000048052', 'ENSG00000048140', 'ENSG00000048162', 'ENSG00000048342', 'ENSG00000048392', 'ENSG00000048405', 'ENSG00000048462', 'ENSG00000048471', 'ENSG00000048540', 'ENSG00000048544', 'ENSG00000048545', 'ENSG00000048649', 'ENSG00000048707', 'ENSG00000048740', 'ENSG00000048828', 'ENSG00000048991', 'ENSG00000049089', 'ENSG00000049130', 'ENSG00000049167', 'ENSG00000049192', 'ENSG00000049239', 'ENSG00000049245', 'ENSG00000049246', 'ENSG00000049247', 'ENSG00000049249', 'ENSG00000049283', 'ENSG00000049323', 'ENSG00000049449', 'ENSG00000049540', 'ENSG00000049541', 'ENSG00000049618', 'ENSG00000049656', 'ENSG00000049759', 'ENSG00000049768', 'ENSG00000049769', 'ENSG00000049860', 'ENSG00000049883', 'ENSG00000050030', 'ENSG00000050130', 'ENSG00000050165', 'ENSG00000050327', 'ENSG00000050344', 'ENSG00000050393', 'ENSG00000050405', 'ENSG00000050426', 'ENSG00000050438', 'ENSG00000050555', 'ENSG00000050628', 'ENSG00000050730', 'ENSG00000050748', 'ENSG00000050767', 'ENSG00000050820', 'ENSG00000051009', 'ENSG00000051108', 'ENSG00000051128', 'ENSG00000051180', 'ENSG00000051341', 'ENSG00000051382', 'ENSG00000051523', 'ENSG00000051596', 'ENSG00000051620', 'ENSG00000051825', 'ENSG00000052126', 'ENSG00000052344', 'ENSG00000052723', 'ENSG00000052749', 'ENSG00000052795', 'ENSG00000052802', 'ENSG00000052841', 'ENSG00000052850', 'ENSG00000053108', 'ENSG00000053254', 'ENSG00000053328', 'ENSG00000053371', 'ENSG00000053372', 'ENSG00000053438', 'ENSG00000053501', 'ENSG00000053524', 'ENSG00000053702', 'ENSG00000053747', 'ENSG00000053770', 'ENSG00000053900', 'ENSG00000053918', 'ENSG00000054116', 'ENSG00000054118', 'ENSG00000054148', 'ENSG00000054179', 'ENSG00000054219', 'ENSG00000054267', 'ENSG00000054277', 'ENSG00000054282', 'ENSG00000054356', 'ENSG00000054392', 'ENSG00000054523', 'ENSG00000054598', 'ENSG00000054611', 'ENSG00000054654', 'ENSG00000054690', 'ENSG00000054793', 'ENSG00000054796', 'ENSG00000054803', 'ENSG00000054938', 'ENSG00000054965', 'ENSG00000054967', 'ENSG00000054983', 'ENSG00000055044', 'ENSG00000055070', 'ENSG00000055118', 'ENSG00000055130', 'ENSG00000055147', 'ENSG00000055163', 'ENSG00000055208', 'ENSG00000055211', 'ENSG00000055332', 'ENSG00000055483', 'ENSG00000055609', 'ENSG00000055732', 'ENSG00000055813', 'ENSG00000055917', 'ENSG00000055950', 'ENSG00000055955', 'ENSG00000055957', 'ENSG00000056050', 'ENSG00000056097', 'ENSG00000056277', 'ENSG00000056291', 'ENSG00000056487', 'ENSG00000056558', 'ENSG00000056586', 'ENSG00000056678', 'ENSG00000056736', 'ENSG00000056972', 'ENSG00000056998', 'ENSG00000057019', 'ENSG00000057149', 'ENSG00000057252', 'ENSG00000057294', 'ENSG00000057468', 'ENSG00000057593', 'ENSG00000057608', 'ENSG00000057657', 'ENSG00000057663', 'ENSG00000057704', 'ENSG00000057757', 'ENSG00000057935', 'ENSG00000058056', 'ENSG00000058063', 'ENSG00000058085', 'ENSG00000058091', 'ENSG00000058262', 'ENSG00000058272', 'ENSG00000058335', 'ENSG00000058404', 'ENSG00000058453', 'ENSG00000058600', 'ENSG00000058668', 'ENSG00000058673', 'ENSG00000058729', 'ENSG00000058799', 'ENSG00000058804', 'ENSG00000058866', 'ENSG00000059122', 'ENSG00000059145', 'ENSG00000059377', 'ENSG00000059378', 'ENSG00000059573', 'ENSG00000059588', 'ENSG00000059691', 'ENSG00000059728', 'ENSG00000059758', 'ENSG00000059769', 'ENSG00000059804', 'ENSG00000059915', 'ENSG00000060069', 'ENSG00000060138', 'ENSG00000060140', 'ENSG00000060237', 'ENSG00000060339', 'ENSG00000060491', 'ENSG00000060558', 'ENSG00000060566', 'ENSG00000060642', 'ENSG00000060656', 'ENSG00000060688', 'ENSG00000060709', 'ENSG00000060718', 'ENSG00000060749', 'ENSG00000060762', 'ENSG00000060971', 'ENSG00000060982', 'ENSG00000061273', 'ENSG00000061337', 'ENSG00000061455', 'ENSG00000061492', 'ENSG00000061656', 'ENSG00000061676', 'ENSG00000061794', 'ENSG00000061918', 'ENSG00000061936', 'ENSG00000061938', 'ENSG00000061987', 'ENSG00000062038', 'ENSG00000062096', 'ENSG00000062194', 'ENSG00000062282', 'ENSG00000062370', 'ENSG00000062485', 'ENSG00000062524', 'ENSG00000062582', 'ENSG00000062598', 'ENSG00000062650', 'ENSG00000062716', 'ENSG00000062725', 'ENSG00000062822', 'ENSG00000063015', 'ENSG00000063046', 'ENSG00000063127', 'ENSG00000063169', 'ENSG00000063176', 'ENSG00000063177', 'ENSG00000063180', 'ENSG00000063241', 'ENSG00000063244', 'ENSG00000063245', 'ENSG00000063322', 'ENSG00000063438', 'ENSG00000063515', 'ENSG00000063587', 'ENSG00000063601', 'ENSG00000063660', 'ENSG00000063761', 'ENSG00000063854', 'ENSG00000063978', 'ENSG00000064012', 'ENSG00000064042', 'ENSG00000064102', 'ENSG00000064115', 'ENSG00000064195', 'ENSG00000064199', 'ENSG00000064201', 'ENSG00000064205', 'ENSG00000064218', 'ENSG00000064225', 'ENSG00000064270', 'ENSG00000064300', 'ENSG00000064309', 'ENSG00000064313', 'ENSG00000064393', 'ENSG00000064419', 'ENSG00000064489', 'ENSG00000064490', 'ENSG00000064545', 'ENSG00000064547', 'ENSG00000064601', 'ENSG00000064607', 'ENSG00000064651', 'ENSG00000064652', 'ENSG00000064655', 'ENSG00000064666', 'ENSG00000064687', 'ENSG00000064692', 'ENSG00000064703', 'ENSG00000064726', 'ENSG00000064763', 'ENSG00000064787', 'ENSG00000064835', 'ENSG00000064886', 'ENSG00000064932', 'ENSG00000064933', 'ENSG00000064961', 'ENSG00000064989', 'ENSG00000064995', 'ENSG00000064999', 'ENSG00000065000', 'ENSG00000065029', 'ENSG00000065054', 'ENSG00000065057', 'ENSG00000065060', 'ENSG00000065135', 'ENSG00000065150', 'ENSG00000065154', 'ENSG00000065183', 'ENSG00000065243', 'ENSG00000065268', 'ENSG00000065308', 'ENSG00000065320', 'ENSG00000065325', 'ENSG00000065328', 'ENSG00000065357', 'ENSG00000065361', 'ENSG00000065371', 'ENSG00000065413', 'ENSG00000065427', 'ENSG00000065457', 'ENSG00000065485', 'ENSG00000065491', 'ENSG00000065518', 'ENSG00000065526', 'ENSG00000065534', 'ENSG00000065548', 'ENSG00000065559', 'ENSG00000065600', 'ENSG00000065609', 'ENSG00000065613', 'ENSG00000065615', 'ENSG00000065618', 'ENSG00000065621', 'ENSG00000065665', 'ENSG00000065675', 'ENSG00000065717', 'ENSG00000065802', 'ENSG00000065809', 'ENSG00000065833', 'ENSG00000065882', 'ENSG00000065883', 'ENSG00000065911', 'ENSG00000065923', 'ENSG00000065970', 'ENSG00000065978', 'ENSG00000065989', 'ENSG00000066027', 'ENSG00000066032', 'ENSG00000066044', 'ENSG00000066056', 'ENSG00000066084', 'ENSG00000066117', 'ENSG00000066135', 'ENSG00000066136', 'ENSG00000066185', 'ENSG00000066230', 'ENSG00000066248', 'ENSG00000066279', 'ENSG00000066294', 'ENSG00000066322', 'ENSG00000066336', 'ENSG00000066379', 'ENSG00000066382', 'ENSG00000066405', 'ENSG00000066422', 'ENSG00000066427', 'ENSG00000066455', 'ENSG00000066468', 'ENSG00000066557', 'ENSG00000066583', 'ENSG00000066629', 'ENSG00000066651', 'ENSG00000066654', 'ENSG00000066697', 'ENSG00000066735', 'ENSG00000066739', 'ENSG00000066777', 'ENSG00000066813', 'ENSG00000066827', 'ENSG00000066855', 'ENSG00000066923', 'ENSG00000066926', 'ENSG00000066933', 'ENSG00000067048', 'ENSG00000067057', 'ENSG00000067064', 'ENSG00000067066', 'ENSG00000067082', 'ENSG00000067113', 'ENSG00000067141', 'ENSG00000067167', 'ENSG00000067177', 'ENSG00000067182', 'ENSG00000067191', 'ENSG00000067208', ...]
In [2]:
Copied!
check = torch.load('../../data/tensorboard/scprint_test/iuealg88/checkpoints/epoch=0-step=13068.ckpt')
check = torch.load('../../data/tensorboard/scprint_test/iuealg88/checkpoints/epoch=0-step=13068.ckpt')
In [ ]:
Copied!
check
check
In [70]:
Copied!
for i in model.transformer.blocks:
print(i.mlp.fc1.weight.mean())
for i in model.transformer.blocks:
print(i.mlp.fc1.weight.mean())
tensor(0.0010, device='cuda:0', grad_fn=<MeanBackward0>) tensor(-0.0005, device='cuda:0', grad_fn=<MeanBackward0>) tensor(-0.0033, device='cuda:0', grad_fn=<MeanBackward0>) tensor(-0.0056, device='cuda:0', grad_fn=<MeanBackward0>) tensor(-0.0005, device='cuda:0', grad_fn=<MeanBackward0>) tensor(-0.0033, device='cuda:0', grad_fn=<MeanBackward0>) tensor(-0.0056, device='cuda:0', grad_fn=<MeanBackward0>)
In [87]:
Copied!
"gene_encoder.embedding.weight"
"gene_encoder.embedding.weight"
Out[87]:
'gene_encoder.embedding.weight'
In [72]:
Copied!
check.keys()
check.keys()
Out[72]:
dict_keys(['epoch', 'global_step', 'pytorch-lightning_version', 'state_dict', 'loops', 'callbacks', 'optimizer_states', 'lr_schedulers', 'MixedPrecisionPlugin', 'hparams_name', 'hyper_parameters'])
In [81]:
Copied!
check['state_dict']["gene_encoder.embedding.weight"][20]
check['state_dict']["gene_encoder.embedding.weight"][20]
Out[81]:
tensor([-0.0049, -0.0209, 0.0067, -0.0165, -0.0013, -0.0163, 0.0099, 0.0014,
0.0046, 0.0236, 0.0029, 0.0037, -0.0082, 0.0207, 0.0136, 0.0052,
0.0035, -0.0023, 0.0339, 0.0160, -0.0114, 0.0137, 0.0289, 0.0159,
-0.0129, 0.0246, -0.0119, -0.0132, 0.0026, 0.0144, 0.0142, 0.0020,
-0.0153, -0.0100, 0.0225, 0.0021, -0.0092, 0.0175, 0.0346, 0.0217,
-0.0244, 0.0340, 0.0142, 0.0009, 0.0054, 0.0105, -0.0213, -0.0084,
-0.0166, -0.0182, 0.0252, -0.0185, 0.0004, -0.0082, 0.0173, 0.0210,
0.0083, 0.0090, 0.0130, -0.0079, -0.0015, -0.0037, -0.0185, -0.0254,
-0.0385, 0.0195, 0.0159, 0.0178, 0.0072, 0.0212, -0.0006, -0.0103,
-0.0163, 0.0068, -0.0087, 0.0016, -0.0048, -0.0140, 0.0152, 0.0078,
-0.0148, 0.0151, -0.0207, 0.0151, -0.0344, 0.0186, 0.0099, 0.0351,
0.0254, 0.0444, 0.0213, -0.0064, -0.0488, -0.0292, 0.0073, -0.0276,
-0.0056, -0.0302, -0.0243, 0.0071, 0.0246, -0.0097, 0.0188, 0.0185,
-0.0401, 0.0421, -0.0024, 0.0047, -0.0073, -0.0259, 0.0066, 0.0022,
-0.0059, 0.0038, 0.0025, -0.0122, -0.0008, -0.0068, -0.0137, 0.0016,
-0.0092, -0.0056, -0.0142, -0.0115, -0.0185, 0.0196, 0.0171, -0.0008],
device='cuda:0')
In [82]:
Copied!
model.gene_encoder.embedding.weight[20]
model.gene_encoder.embedding.weight[20]
Out[82]:
tensor([-0.0049, -0.0209, 0.0067, -0.0165, -0.0013, -0.0163, 0.0099, 0.0014,
0.0046, 0.0236, 0.0029, 0.0037, -0.0082, 0.0207, 0.0136, 0.0052,
0.0035, -0.0023, 0.0339, 0.0160, -0.0114, 0.0137, 0.0289, 0.0159,
-0.0129, 0.0246, -0.0119, -0.0132, 0.0026, 0.0144, 0.0142, 0.0020,
-0.0153, -0.0100, 0.0225, 0.0021, -0.0092, 0.0175, 0.0346, 0.0217,
-0.0244, 0.0340, 0.0142, 0.0009, 0.0054, 0.0105, -0.0213, -0.0084,
-0.0166, -0.0182, 0.0252, -0.0185, 0.0004, -0.0082, 0.0173, 0.0210,
0.0083, 0.0090, 0.0130, -0.0079, -0.0015, -0.0037, -0.0185, -0.0254,
-0.0385, 0.0195, 0.0159, 0.0178, 0.0072, 0.0212, -0.0006, -0.0103,
-0.0163, 0.0068, -0.0087, 0.0016, -0.0048, -0.0140, 0.0152, 0.0078,
-0.0148, 0.0151, -0.0207, 0.0151, -0.0344, 0.0186, 0.0099, 0.0351,
0.0254, 0.0444, 0.0213, -0.0064, -0.0488, -0.0292, 0.0073, -0.0276,
-0.0056, -0.0302, -0.0243, 0.0071, 0.0246, -0.0097, 0.0188, 0.0185,
-0.0401, 0.0421, -0.0024, 0.0047, -0.0073, -0.0259, 0.0066, 0.0022,
-0.0059, 0.0038, 0.0025, -0.0122, -0.0008, -0.0068, -0.0137, 0.0016,
-0.0092, -0.0056, -0.0142, -0.0115, -0.0185, 0.0196, 0.0171, -0.0008],
device='cuda:0')
In [119]:
Copied!
sembeddings[20]
sembeddings[20]
Out[119]:
tensor([ 6.6611e-03, 3.2946e-02, 1.2063e-02, 1.3155e-02, -6.7928e-03,
3.0272e-02, 1.2692e-02, -5.3972e-03, 1.9142e-02, -2.3445e-03,
-2.7137e-03, 9.3737e-03, -4.7150e-03, 2.5329e-02, -2.0919e-02,
3.3505e-02, 1.8250e-02, 1.1739e-02, 2.7701e-02, -1.4182e-02,
7.5865e-03, -1.8503e-02, 4.2267e-03, -3.1029e-01, 1.1854e-02,
3.5789e-02, -1.7974e-02, -3.6900e-03, 3.4248e-02, -2.0346e-02,
2.4250e-02, -3.4916e-02, 1.7738e-02, 9.9715e-03, -1.4236e-02,
8.7822e-03, -1.9749e-03, 2.2516e-02, -5.4434e-03, 8.0317e-04,
-9.3315e-03, 1.0459e-02, 2.9404e-02, -2.3727e-02, -1.1082e-02,
1.0306e-02, 3.4731e-02, -1.6314e-02, 6.2507e-03, -1.3389e-02,
3.1514e-02, 3.6710e-04, -2.6332e-02, -8.5840e-04, 8.3556e-03,
-4.2249e-02, 2.4041e-03, 1.5509e-03, 5.7861e-02, 1.9756e-02,
6.6668e-03, -9.0279e-03, -1.2321e-02, 1.2319e-02, 3.5193e-02,
5.4685e-02, -2.1232e-02, 5.7735e-03, -4.9492e-03, 6.1288e-02,
-2.1482e-02, 7.8210e-03, 4.7659e-02, 1.3722e-01, 7.4602e-03,
3.4081e-03, 1.6790e-02, -4.4877e-02, -2.6832e-03, 1.5035e-02,
1.2683e-03, 9.2635e-03, -1.1464e-02, 2.5933e-02, 1.2168e-02,
1.8979e-02, 2.0139e-02, 5.9712e-02, 2.7476e-02, 1.0938e-02,
2.1327e-02, -1.3357e-02, -1.8981e-02, -2.1589e-03, 2.0913e-02,
1.9166e-02, -3.0249e-02, 2.7663e-02, 1.4311e-02, -4.3504e-03,
-8.7722e-03, -6.1208e-03, 5.8541e-03, 4.7430e-02, 1.9528e-02,
-2.6761e-03, 1.3365e-02, -2.0641e-02, 8.3008e-03, 9.7633e-03,
1.9733e-02, -1.3637e-02, -1.5420e-02, 4.2381e-02, 3.0103e-02,
2.0623e-02, -5.0322e-01, 8.5753e-03, -7.8186e-03, 4.5284e-02,
-3.0636e-03, 3.3920e-02, -2.1339e-02, 1.8148e-02, 8.7961e-03,
-1.2559e-02, 3.1327e-02, 6.8908e-03], dtype=torch.float64)
In [3]:
Copied!
model.genes[0]
model.genes[0]
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In[3], line 1 ----> 1 model.genes[0] NameError: name 'model' is not defined
In [4]:
Copied!
#model = scPrint.load_from_checkpoint('../../data/tensorboard/scprint_test/urxxwu28/checkpoints/epoch=0-step=20000.ckpt') #chocolate-surf-8
model = scPrint.load_from_checkpoint('../../data/tensorboard/scprint_test/iuealg88/checkpoints/epoch=0-step=13068.ckpt', precpt_gene_emb = '../../data/temp/embeddings.parquet') #lambent night
#model = scPrint.load_from_checkpoint('../../data/tensorboard/scprint_test/urxxwu28/checkpoints/epoch=0-step=20000.ckpt') #chocolate-surf-8
model = scPrint.load_from_checkpoint('../../data/tensorboard/scprint_test/iuealg88/checkpoints/epoch=0-step=13068.ckpt', precpt_gene_emb = '../../data/temp/embeddings.parquet') #lambent night
scPrint(
(gene_encoder): GeneEncoder(
(embedding): Embedding(33890, 128)
(enc_norm): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(expr_encoder): ContinuousValueEncoder(
(linear1): Linear(in_features=1, out_features=128, bias=True)
(activation): ReLU()
(norm): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(pos_encoder): PositionalEncoding(
(dropout): Dropout(p=0.1, inplace=False)
)
(label_encoder): CategoryValueEncoder(
(embedding): Embedding(8, 128)
(enc_norm): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
)
(time_encoder): ContinuousValueEncoder(
(linear1): Linear(in_features=1, out_features=128, bias=True)
(activation): ReLU()
(norm): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(transformer): FlashTransformerEncoder(
(blocks): ModuleList(
(0-3): 4 x Block(
(mixer): MHA(
(Wqkv): Linear(in_features=128, out_features=384, bias=True)
(inner_attn): FlashSelfAttention()
(inner_cross_attn): FlashCrossAttention(
(drop): Dropout(p=0.1, inplace=False)
)
(out_proj): Linear(in_features=128, out_features=128, bias=True)
)
(dropout1): Dropout(p=0.1, inplace=False)
(drop_path1): StochasticDepth(p=0.0, mode=row)
(norm1): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=128, out_features=512, bias=True)
(activation): GELU(approximate='none')
(fc2): Linear(in_features=512, out_features=128, bias=True)
)
(dropout2): Dropout(p=0.1, inplace=False)
(drop_path2): StochasticDepth(p=0.0, mode=row)
(norm2): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
)
)
(dropout): Dropout(p=0.1, inplace=False)
(drop_path): StochasticDepth(p=0.0, mode=row)
(norm): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
)
(expr_decoder): ExprDecoder(
(fc): Sequential(
(0): Linear(in_features=128, out_features=128, bias=True)
(1): LeakyReLU(negative_slope=0.01)
(2): Dropout(p=0.1, inplace=False)
)
(finalfc): Sequential(
(0): Linear(in_features=128, out_features=128, bias=True)
(1): LeakyReLU(negative_slope=0.01)
)
(depth_encoder): Sequential(
(0): ContinuousValueEncoder(
(linear1): Linear(in_features=1, out_features=128, bias=True)
(activation): ReLU()
(norm): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(1): Linear(in_features=128, out_features=128, bias=True)
(2): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
(3): LeakyReLU(negative_slope=0.01)
(4): Dropout(p=0.1, inplace=False)
)
(pred_var_zero): Linear(in_features=128, out_features=3, bias=True)
(depth_fc): Sequential(
(0): Linear(in_features=128, out_features=1, bias=True)
(1): ReLU()
)
)
(cls_decoders): ModuleDict(
(cell_type_ontology_term_id): ClsDecoder(
(decoder): Sequential(
(0): Linear(in_features=128, out_features=128, bias=True)
(1): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
(2): ReLU()
(3): Dropout(p=0.1, inplace=False)
)
(out_layer): Linear(in_features=128, out_features=190, bias=True)
)
(disease_ontology_term_id): ClsDecoder(
(decoder): Sequential(
(0): Linear(in_features=128, out_features=128, bias=True)
(1): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
(2): ReLU()
(3): Dropout(p=0.1, inplace=False)
)
(out_layer): Linear(in_features=128, out_features=18, bias=True)
)
(assay_ontology_term_id): ClsDecoder(
(decoder): Sequential(
(0): Linear(in_features=128, out_features=128, bias=True)
(1): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
(2): ReLU()
(3): Dropout(p=0.1, inplace=False)
)
(out_layer): Linear(in_features=128, out_features=11, bias=True)
)
(self_reported_ethnicity_ontology_term_id): ClsDecoder(
(decoder): Sequential(
(0): Linear(in_features=128, out_features=128, bias=True)
(1): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
(2): ReLU()
(3): Dropout(p=0.1, inplace=False)
)
(out_layer): Linear(in_features=128, out_features=7, bias=True)
)
(sex_ontology_term_id): ClsDecoder(
(decoder): Sequential(
(0): Linear(in_features=128, out_features=128, bias=True)
(1): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
(2): ReLU()
(3): Dropout(p=0.1, inplace=False)
)
(out_layer): Linear(in_features=128, out_features=2, bias=True)
)
(organism_ontology_term_id): ClsDecoder(
(decoder): Sequential(
(0): Linear(in_features=128, out_features=128, bias=True)
(1): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
(2): ReLU()
(3): Dropout(p=0.1, inplace=False)
)
(out_layer): Linear(in_features=128, out_features=2, bias=True)
)
)
(mvc_decoder): MVCDecoder(
(depth_encoder): Sequential(
(0): ContinuousValueEncoder(
(linear1): Linear(in_features=1, out_features=128, bias=True)
(activation): ReLU()
(norm): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(1): Linear(in_features=128, out_features=128, bias=True)
(2): LeakyReLU(negative_slope=0.01)
)
(depth_fc): Sequential(
(0): Linear(in_features=128, out_features=128, bias=True)
(1): LeakyReLU(negative_slope=0.01)
(2): Linear(in_features=128, out_features=1, bias=True)
(3): ReLU()
)
(gene2query): Linear(in_features=128, out_features=128, bias=True)
(query_activation): Sigmoid()
(pred_var_zero): Linear(in_features=128, out_features=384, bias=False)
)
)
--------------------------------------------------------------------------- RuntimeError Traceback (most recent call last) Cell In[4], line 2 1 #model = scPrint.load_from_checkpoint('../../data/tensorboard/scprint_test/urxxwu28/checkpoints/epoch=0-step=20000.ckpt') #chocolate-surf-8 ----> 2 model = scPrint.load_from_checkpoint('../../data/tensorboard/scprint_test/iuealg88/checkpoints/epoch=0-step=13068.ckpt', precpt_gene_emb = '../../data/temp/embeddings.parquet') #lambent night File ~/miniconda3/envs/scprint/lib/python3.10/site-packages/lightning/pytorch/core/module.py:1543, in LightningModule.load_from_checkpoint(cls, checkpoint_path, map_location, hparams_file, strict, **kwargs) 1463 @classmethod 1464 def load_from_checkpoint( 1465 cls, (...) 1470 **kwargs: Any, 1471 ) -> Self: 1472 r""" 1473 Primary way of loading a model from a checkpoint. When Lightning saves a checkpoint 1474 it stores the arguments passed to ``__init__`` in the checkpoint under ``"hyper_parameters"``. (...) 1541 y_hat = pretrained_model(x) 1542 """ -> 1543 loaded = _load_from_checkpoint( 1544 cls, 1545 checkpoint_path, 1546 map_location, 1547 hparams_file, 1548 strict, 1549 **kwargs, 1550 ) 1551 return cast(Self, loaded) File ~/miniconda3/envs/scprint/lib/python3.10/site-packages/lightning/pytorch/core/saving.py:91, in _load_from_checkpoint(cls, checkpoint_path, map_location, hparams_file, strict, **kwargs) 89 return _load_state(cls, checkpoint, **kwargs) 90 if issubclass(cls, pl.LightningModule): ---> 91 model = _load_state(cls, checkpoint, strict=strict, **kwargs) 92 state_dict = checkpoint["state_dict"] 93 if not state_dict: File ~/miniconda3/envs/scprint/lib/python3.10/site-packages/lightning/pytorch/core/saving.py:157, in _load_state(cls, checkpoint, strict, **cls_kwargs_new) 155 # load the state_dict on the model automatically 156 assert strict is not None --> 157 keys = obj.load_state_dict(checkpoint["state_dict"], strict=strict) 159 if not strict: 160 if keys.missing_keys: File ~/miniconda3/envs/scprint/lib/python3.10/site-packages/torch/nn/modules/module.py:2041, in Module.load_state_dict(self, state_dict, strict) 2036 error_msgs.insert( 2037 0, 'Missing key(s) in state_dict: {}. '.format( 2038 ', '.join('"{}"'.format(k) for k in missing_keys))) 2040 if len(error_msgs) > 0: -> 2041 raise RuntimeError('Error(s) in loading state_dict for {}:\n\t{}'.format( 2042 self.__class__.__name__, "\n\t".join(error_msgs))) 2043 return _IncompatibleKeys(missing_keys, unexpected_keys) RuntimeError: Error(s) in loading state_dict for scPrint: Missing key(s) in state_dict: "expr_decoder.depth_encoder.2.weight", "expr_decoder.depth_encoder.2.bias", "cls_decoders.cell_type_ontology_term_id.decoder.0.weight", "cls_decoders.cell_type_ontology_term_id.decoder.0.bias", "cls_decoders.cell_type_ontology_term_id.decoder.1.weight", "cls_decoders.cell_type_ontology_term_id.decoder.1.bias", "cls_decoders.disease_ontology_term_id.decoder.0.weight", "cls_decoders.disease_ontology_term_id.decoder.0.bias", "cls_decoders.disease_ontology_term_id.decoder.1.weight", "cls_decoders.disease_ontology_term_id.decoder.1.bias", "cls_decoders.assay_ontology_term_id.decoder.0.weight", "cls_decoders.assay_ontology_term_id.decoder.0.bias", "cls_decoders.assay_ontology_term_id.decoder.1.weight", "cls_decoders.assay_ontology_term_id.decoder.1.bias", "cls_decoders.self_reported_ethnicity_ontology_term_id.decoder.0.weight", "cls_decoders.self_reported_ethnicity_ontology_term_id.decoder.0.bias", "cls_decoders.self_reported_ethnicity_ontology_term_id.decoder.1.weight", "cls_decoders.self_reported_ethnicity_ontology_term_id.decoder.1.bias", "cls_decoders.sex_ontology_term_id.decoder.0.weight", "cls_decoders.sex_ontology_term_id.decoder.0.bias", "cls_decoders.sex_ontology_term_id.decoder.1.weight", "cls_decoders.sex_ontology_term_id.decoder.1.bias", "cls_decoders.organism_ontology_term_id.decoder.0.weight", "cls_decoders.organism_ontology_term_id.decoder.0.bias", "cls_decoders.organism_ontology_term_id.decoder.1.weight", "cls_decoders.organism_ontology_term_id.decoder.1.bias". Unexpected key(s) in state_dict: "cls_decoders.cell_type_ontology_term_id._decoder.0.weight", "cls_decoders.cell_type_ontology_term_id._decoder.0.bias", "cls_decoders.cell_type_ontology_term_id._decoder.1.weight", "cls_decoders.cell_type_ontology_term_id._decoder.1.bias", "cls_decoders.disease_ontology_term_id._decoder.0.weight", "cls_decoders.disease_ontology_term_id._decoder.0.bias", "cls_decoders.disease_ontology_term_id._decoder.1.weight", "cls_decoders.disease_ontology_term_id._decoder.1.bias", "cls_decoders.assay_ontology_term_id._decoder.0.weight", "cls_decoders.assay_ontology_term_id._decoder.0.bias", "cls_decoders.assay_ontology_term_id._decoder.1.weight", "cls_decoders.assay_ontology_term_id._decoder.1.bias", "cls_decoders.self_reported_ethnicity_ontology_term_id._decoder.0.weight", "cls_decoders.self_reported_ethnicity_ontology_term_id._decoder.0.bias", "cls_decoders.self_reported_ethnicity_ontology_term_id._decoder.1.weight", "cls_decoders.self_reported_ethnicity_ontology_term_id._decoder.1.bias", "cls_decoders.sex_ontology_term_id._decoder.0.weight", "cls_decoders.sex_ontology_term_id._decoder.0.bias", "cls_decoders.sex_ontology_term_id._decoder.1.weight", "cls_decoders.sex_ontology_term_id._decoder.1.bias", "cls_decoders.organism_ontology_term_id._decoder.0.weight", "cls_decoders.organism_ontology_term_id._decoder.0.bias", "cls_decoders.organism_ontology_term_id._decoder.1.weight", "cls_decoders.organism_ontology_term_id._decoder.1.bias".
In [4]:
Copied!
ensembl = load_genes(['NCBITaxon:9606'])
ensembl['ensembl_gene_id'] = ensembl.index
ensembl = load_genes(['NCBITaxon:9606'])
ensembl['ensembl_gene_id'] = ensembl.index
Loading the data¶
In [ ]:
Copied!
adata.sc.read(
only dataset dropped
)
adata.sc.read(
only dataset dropped
)
In [110]:
Copied!
adata = sc.read(
"data/pancreas_atlas.h5ad",
backup_url="https://figshare.com/ndownloader/files/24539828",
)
adata = sc.read(
"data/pancreas_atlas.h5ad",
backup_url="https://figshare.com/ndownloader/files/24539828",
)
/home/ml4ig1/miniconda3/envs/scprint/lib/python3.10/site-packages/anndata/__init__.py:51: FutureWarning: `anndata.read` is deprecated, use `anndata.read_h5ad` instead. `ad.read` will be removed in mid 2024. warnings.warn(
In [93]:
Copied!
adata.X.sum(0)
adata.X.sum(0)
Out[93]:
array([ 6171.3677, 17159.143 , 2168.623 , ..., 17207.316 , 11868.23 ,
11490.851 ], dtype=float32)
In [19]:
Copied!
#adata = sc.read(
# "data/lung_atlas.h5ad",
# backup_url="https://figshare.com/ndownloader/files/24539942",
#)
#adata = sc.read(
# "data/lung_atlas.h5ad",
# backup_url="https://figshare.com/ndownloader/files/24539942",
#)
In [20]:
Copied!
#adata = sc.read_h5ad('/home/ml4ig1/scprint/.lamindb/BljRloq1xjcxRNDpejzI.h5ad')
#adata = sc.read_h5ad('/home/ml4ig1/scprint/.lamindb/BljRloq1xjcxRNDpejzI.h5ad')
In [21]:
Copied!
Preprocessor()(adata.copy())
# cannot preprocess (first of all not raw counts and also doesn't have standardized values)
Preprocessor()(adata.copy())
# cannot preprocess (first of all not raw counts and also doesn't have standardized values)
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) Cell In[21], line 1 ----> 1 Preprocessor()(adata.copy()) 2 # cannot preprocess (first of all not raw counts and also doesn't have standardized values) File ~/Documents code/scDataLoader/scdataloader/preprocess.py:114, in Preprocessor.__call__(self, adata) 110 # check that it is a count 111 if ( 112 adata.X.astype(int).sum() != adata.X.sum() and not self.force_preprocess 113 ): # check if likely raw data --> 114 raise ValueError( 115 "Data is not raw counts, please check layers, find raw data, or bypass with force_preprocess" 116 ) 117 # please check layers 118 # if not available count drop 119 # # cleanup and dropping low expressed genes and unexpressed cells 120 prevsize = adata.shape[0] ValueError: Data is not raw counts, please check layers, find raw data, or bypass with force_preprocess
In [111]:
Copied!
# You need to use counts
adata.X = adata.layers['counts']
del adata.layers
# You need to use counts
adata.X = adata.layers['counts']
del adata.layers
In [112]:
Copied!
# you need to have ensembl gene ids
# # removing the other ones
var = adata.var.merge(ensembl.drop_duplicates('symbol').set_index('symbol', drop=False), left_index=True, right_index=True, how='inner').sort_values(by="ensembl_gene_id").set_index('ensembl_gene_id')
adata = adata[:, var['symbol']]
adata.var = var
# adding back the missing genes
unseen = set(ensembl.index) - set(adata.var.index)
# adding them to adata
emptyda = ad.AnnData(
csr_matrix((adata.shape[0], len(unseen)), dtype=np.float32),
var=pd.DataFrame(index=list(unseen)),
obs=pd.DataFrame(index=adata.obs.index),
)
adata = ad.concat([adata, emptyda], axis=1, join="outer", merge="only")
# do a validation function
adata.uns["unseen_genes"] = list(unseen)
adata = adata[:, adata.var.sort_index().index]
# Add at least the organism you are working with
adata.obs['organism_ontology_term_id'] = "NCBITaxon:9606"
adata
# you need to have ensembl gene ids
# # removing the other ones
var = adata.var.merge(ensembl.drop_duplicates('symbol').set_index('symbol', drop=False), left_index=True, right_index=True, how='inner').sort_values(by="ensembl_gene_id").set_index('ensembl_gene_id')
adata = adata[:, var['symbol']]
adata.var = var
# adding back the missing genes
unseen = set(ensembl.index) - set(adata.var.index)
# adding them to adata
emptyda = ad.AnnData(
csr_matrix((adata.shape[0], len(unseen)), dtype=np.float32),
var=pd.DataFrame(index=list(unseen)),
obs=pd.DataFrame(index=adata.obs.index),
)
adata = ad.concat([adata, emptyda], axis=1, join="outer", merge="only")
# do a validation function
adata.uns["unseen_genes"] = list(unseen)
adata = adata[:, adata.var.sort_index().index]
# Add at least the organism you are working with
adata.obs['organism_ontology_term_id'] = "NCBITaxon:9606"
adata
/tmp/ipykernel_21803/632984386.py:21: ImplicitModificationWarning: Trying to modify attribute `.obs` of view, initializing view as actual. adata.obs['organism_ontology_term_id'] = "NCBITaxon:9606"
Out[112]:
AnnData object with n_obs × n_vars = 16382 × 70116
obs: 'tech', 'celltype', 'size_factors', 'organism_ontology_term_id'
var: 'uid', 'symbol', 'stable_id', 'ncbi_gene_ids', 'biotype', 'description', 'synonyms', 'organism_id', 'public_source_id', 'created_at', 'updated_at', 'created_by_id', 'mt', 'ribo', 'hb', 'organism'
uns: 'unseen_genes'
In [113]:
Copied!
adataset = SimpleAnnDataset(adata, obs_to_output=['organism_ontology_term_id'])
col = Collator(organisms=["NCBITaxon:9606",], valid_genes=model.genes, how="most expr", max_len=1000,add_zero_genes=100)#mdataset.encoder['organism_ontology_term_id'])
dataloader = DataLoader(adataset, collate_fn=col, batch_size=64, num_workers=4, shuffle=False)
adataset = SimpleAnnDataset(adata, obs_to_output=['organism_ontology_term_id'])
col = Collator(organisms=["NCBITaxon:9606",], valid_genes=model.genes, how="most expr", max_len=1000,add_zero_genes=100)#mdataset.encoder['organism_ontology_term_id'])
dataloader = DataLoader(adataset, collate_fn=col, batch_size=64, num_workers=4, shuffle=False)
In [29]:
Copied!
#from scdataloader import AnnDataCollator
#from anndata.experimental import AnnLoader
#dataloader = AnnLoader([adata], collate_fn=AnnDataCollator(max_len=5000, organisms=["NCBITaxon:9606",], org_to_id={'NCBITaxon:9606': 'NCBITaxon:9606'}, how="most expr", logp1=True), batch_size=32, num_workers=4)
#from scdataloader import AnnDataCollator
#from anndata.experimental import AnnLoader
#dataloader = AnnLoader([adata], collate_fn=AnnDataCollator(max_len=5000, organisms=["NCBITaxon:9606",], org_to_id={'NCBITaxon:9606': 'NCBITaxon:9606'}, how="most expr", logp1=True), batch_size=32, num_workers=4)
In [114]:
Copied!
trainer = Trainer(precision=16)
trainer = Trainer(precision=16)
/home/ml4ig1/miniconda3/envs/scprint/lib/python3.10/site-packages/lightning/fabric/connector.py:554: UserWarning: 16 is supported for historical reasons but its usage is discouraged. Please set your precision to 16-mixed instead! rank_zero_warn( INFO: Using 16bit Automatic Mixed Precision (AMP) 2024-02-27 14:54:08,590:INFO - Using 16bit Automatic Mixed Precision (AMP) INFO: Using 16bit Automatic Mixed Precision (AMP) 2024-02-27 14:54:08,590:INFO - Using 16bit Automatic Mixed Precision (AMP) INFO: GPU available: True (cuda), used: True 2024-02-27 14:54:08,622:INFO - GPU available: True (cuda), used: True INFO: TPU available: False, using: 0 TPU cores 2024-02-27 14:54:08,625:INFO - TPU available: False, using: 0 TPU cores INFO: IPU available: False, using: 0 IPUs 2024-02-27 14:54:08,628:INFO - IPU available: False, using: 0 IPUs INFO: HPU available: False, using: 0 HPUs 2024-02-27 14:54:08,630:INFO - HPU available: False, using: 0 HPUs
In [32]:
Copied!
model.labels, model.pred_embedding
model.labels, model.pred_embedding
Out[32]:
(['cell_type_ontology_term_id', 'disease_ontology_term_id', 'assay_ontology_term_id', 'self_reported_ethnicity_ontology_term_id', 'sex_ontology_term_id', 'organism_ontology_term_id'], ['cell_type_ontology_term_id', 'disease_ontology_term_id', 'self_reported_ethnicity_ontology_term_id', 'sex_ontology_term_id'])
In [115]:
Copied!
model.pred_embedding = [
"cell_type_ontology_term_id",
"disease_ontology_term_id",
"self_reported_ethnicity_ontology_term_id",
"sex_ontology_term_id",
]
model.pred_embedding = [
"cell_type_ontology_term_id",
"disease_ontology_term_id",
"self_reported_ethnicity_ontology_term_id",
"sex_ontology_term_id",
]
In [12]:
Copied!
adata.obs['tech'].value_counts()
adata.obs['tech'].value_counts()
Out[12]:
tech inDrop3 3605 smartseq2 2394 celseq2 2285 inDrop1 1937 inDrop2 1724 smarter 1492 inDrop4 1303 celseq 1004 fluidigmc1 638 Name: count, dtype: int64
In [13]:
Copied!
adata.obs.celltype.value_counts()
adata.obs.celltype.value_counts()
Out[13]:
celltype alpha 5493 beta 4169 ductal 2142 acinar 1669 delta 1055 gamma 699 activated_stellate 464 endothelial 313 quiescent_stellate 193 macrophage 79 mast 42 epsilon 32 schwann 25 t_cell 7 Name: count, dtype: int64
In [116]:
Copied!
predictions = trainer.predict(model, dataloader)
predictions = trainer.predict(model, dataloader)
INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0] 2024-02-27 14:54:17,071:INFO - LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Predicting: 0it [00:00, ?it/s]
/home/ml4ig1/miniconda3/envs/scprint/lib/python3.10/site-packages/lightning/pytorch/loops/prediction_loop.py:234: UserWarning: predict returned None if it was on purpose, ignore this warning...
self._warning_cache.warn("predict returned None if it was on purpose, ignore this warning...")
/home/ml4ig1/miniconda3/envs/scprint/lib/python3.10/site-packages/anndata/_core/anndata.py:183: ImplicitModificationWarning: Transforming to str index.
warnings.warn("Transforming to str index.", ImplicitModificationWarning)
WARNING: You’re trying to run this on 128 dimensions of `.X`, if you really want this, set `use_rep='X'`.
Falling back to preprocessing with `sc.pp.pca` and default params.
/home/ml4ig1/miniconda3/envs/scprint/lib/python3.10/site-packages/anndata/_core/anndata.py:522: FutureWarning: The dtype argument is deprecated and will be removed in late 2024. warnings.warn(
AnnData object with n_obs × n_vars = 16382 × 128
obs: 'pred_cell_type_ontology_term_id', 'pred_disease_ontology_term_id', 'pred_assay_ontology_term_id', 'pred_self_reported_ethnicity_ontology_term_id', 'pred_sex_ontology_term_id', 'pred_organism_ontology_term_id', 'leiden'
uns: 'neighbors', 'umap', 'leiden'
obsm: 'X_pca', 'X_umap'
obsp: 'distances', 'connectivities'
/home/ml4ig1/miniconda3/envs/scprint/lib/python3.10/site-packages/scanpy/plotting/_tools/scatterplots.py:1251: FutureWarning: The default value of 'ignore' for the `na_action` parameter in pandas.Categorical.map is deprecated and will be changed to 'None' in a future version. Please set na_action to the desired value to avoid seeing this warning color_vector = pd.Categorical(values.map(color_map)) /home/ml4ig1/miniconda3/envs/scprint/lib/python3.10/site-packages/scanpy/plotting/_tools/scatterplots.py:394: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored cax = scatter( /home/ml4ig1/miniconda3/envs/scprint/lib/python3.10/site-packages/scanpy/plotting/_tools/scatterplots.py:1251: FutureWarning: The default value of 'ignore' for the `na_action` parameter in pandas.Categorical.map is deprecated and will be changed to 'None' in a future version. Please set na_action to the desired value to avoid seeing this warning color_vector = pd.Categorical(values.map(color_map)) /home/ml4ig1/miniconda3/envs/scprint/lib/python3.10/site-packages/scanpy/plotting/_tools/scatterplots.py:394: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored cax = scatter( /home/ml4ig1/miniconda3/envs/scprint/lib/python3.10/site-packages/scanpy/plotting/_tools/scatterplots.py:1251: FutureWarning: The default value of 'ignore' for the `na_action` parameter in pandas.Categorical.map is deprecated and will be changed to 'None' in a future version. Please set na_action to the desired value to avoid seeing this warning color_vector = pd.Categorical(values.map(color_map)) /home/ml4ig1/miniconda3/envs/scprint/lib/python3.10/site-packages/scanpy/plotting/_tools/scatterplots.py:394: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored cax = scatter( /home/ml4ig1/miniconda3/envs/scprint/lib/python3.10/site-packages/scanpy/plotting/_tools/scatterplots.py:1251: FutureWarning: The default value of 'ignore' for the `na_action` parameter in pandas.Categorical.map is deprecated and will be changed to 'None' in a future version. Please set na_action to the desired value to avoid seeing this warning color_vector = pd.Categorical(values.map(color_map)) /home/ml4ig1/miniconda3/envs/scprint/lib/python3.10/site-packages/scanpy/plotting/_tools/scatterplots.py:394: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored cax = scatter( /home/ml4ig1/miniconda3/envs/scprint/lib/python3.10/site-packages/scanpy/plotting/_tools/scatterplots.py:1251: FutureWarning: The default value of 'ignore' for the `na_action` parameter in pandas.Categorical.map is deprecated and will be changed to 'None' in a future version. Please set na_action to the desired value to avoid seeing this warning color_vector = pd.Categorical(values.map(color_map)) /home/ml4ig1/miniconda3/envs/scprint/lib/python3.10/site-packages/scanpy/plotting/_tools/scatterplots.py:394: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored cax = scatter( /home/ml4ig1/miniconda3/envs/scprint/lib/python3.10/site-packages/scanpy/plotting/_tools/scatterplots.py:1251: FutureWarning: The default value of 'ignore' for the `na_action` parameter in pandas.Categorical.map is deprecated and will be changed to 'None' in a future version. Please set na_action to the desired value to avoid seeing this warning color_vector = pd.Categorical(values.map(color_map)) /home/ml4ig1/miniconda3/envs/scprint/lib/python3.10/site-packages/scanpy/plotting/_tools/scatterplots.py:394: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored cax = scatter(
couldn't log to tensorboard couldn't log to wandb
In [65]:
Copied!
predictions = trainer.predict(model, dataloader)
predictions = trainer.predict(model, dataloader)
INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0] 2024-02-20 17:26:30,709:INFO - LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0] /home/ml4ig1/miniconda3/envs/scprint/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:442: PossibleUserWarning: The dataloader, predict_dataloader, does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` (try 40 which is the number of cpus on this machine) in the `DataLoader` init to improve performance. rank_zero_warn(
Predicting: 0it [00:00, ?it/s]
/home/ml4ig1/miniconda3/envs/scprint/lib/python3.10/site-packages/anndata/_core/anndata.py:183: ImplicitModificationWarning: Transforming to str index.
warnings.warn("Transforming to str index.", ImplicitModificationWarning)
WARNING: You’re trying to run this on 128 dimensions of `.X`, if you really want this, set `use_rep='X'`.
Falling back to preprocessing with `sc.pp.pca` and default params.
/home/ml4ig1/miniconda3/envs/scprint/lib/python3.10/site-packages/anndata/_core/anndata.py:522: FutureWarning: The dtype argument is deprecated and will be removed in late 2024. warnings.warn(
AnnData object with n_obs × n_vars = 10112 × 128
obs: 'pred_cell_type_ontology_term_id', 'pred_disease_ontology_term_id', 'pred_assay_ontology_term_id', 'pred_self_reported_ethnicity_ontology_term_id', 'pred_sex_ontology_term_id', 'pred_organism_ontology_term_id', 'leiden'
uns: 'neighbors', 'umap', 'leiden'
obsm: 'X_pca', 'X_umap'
obsp: 'distances', 'connectivities'
/home/ml4ig1/miniconda3/envs/scprint/lib/python3.10/site-packages/scanpy/plotting/_tools/scatterplots.py:394: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored cax = scatter( /home/ml4ig1/miniconda3/envs/scprint/lib/python3.10/site-packages/scanpy/plotting/_tools/scatterplots.py:394: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored cax = scatter( /home/ml4ig1/miniconda3/envs/scprint/lib/python3.10/site-packages/scanpy/plotting/_tools/scatterplots.py:394: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored cax = scatter( /home/ml4ig1/miniconda3/envs/scprint/lib/python3.10/site-packages/scanpy/plotting/_tools/scatterplots.py:394: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored cax = scatter( /home/ml4ig1/miniconda3/envs/scprint/lib/python3.10/site-packages/scanpy/plotting/_tools/scatterplots.py:394: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored cax = scatter( /home/ml4ig1/miniconda3/envs/scprint/lib/python3.10/site-packages/scanpy/plotting/_tools/scatterplots.py:394: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored cax = scatter(
couldn't log to tensorboard couldn't log to wandb
In [103]:
Copied!
pred_adata = sc.read_h5ad(
(model.logger.save_dir if model.logger is not None else "/tmp")
+ "/step_"
+ str(model.global_step)
+ "_"
+""
+ ".h5ad"
)
pred_adata = sc.read_h5ad(
(model.logger.save_dir if model.logger is not None else "/tmp")
+ "/step_"
+ str(model.global_step)
+ "_"
+""
+ ".h5ad"
)
In [104]:
Copied!
pred_adata.obs
pred_adata.obs
Out[104]:
| pred_cell_type_ontology_term_id | pred_disease_ontology_term_id | pred_assay_ontology_term_id | pred_self_reported_ethnicity_ontology_term_id | pred_sex_ontology_term_id | pred_organism_ontology_term_id | leiden | |
|---|---|---|---|---|---|---|---|
| 0 | CL:0011020 | PATO:0000461 | EFO:0008722 | HANCESTRO:0005 | PATO:0000383 | NCBITaxon:9606 | 3 |
| 1 | CL:0002138 | PATO:0000461 | EFO:0030007 | HANCESTRO:0005 | PATO:0000383 | NCBITaxon:9606 | 6 |
| 2 | CL:0011020 | PATO:0000461 | EFO:0030007 | HANCESTRO:0005 | PATO:0000383 | NCBITaxon:9606 | 6 |
| 3 | CL:1000271 | PATO:0000461 | EFO:0030007 | HANCESTRO:0005 | PATO:0000383 | NCBITaxon:9606 | 21 |
| 4 | CL:0011020 | PATO:0000461 | EFO:0030059 | HANCESTRO:0005 | PATO:0000383 | NCBITaxon:9606 | 14 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 16377 | CL:1000271 | PATO:0000461 | EFO:0030007 | HANCESTRO:0005 | PATO:0000383 | NCBITaxon:9606 | 6 |
| 16378 | CL:0000128 | PATO:0000461 | EFO:0009899 | HANCESTRO:0005 | PATO:0000384 | NCBITaxon:9606 | 23 |
| 16379 | CL:0002138 | PATO:0000461 | EFO:0030007 | HANCESTRO:0005 | PATO:0000383 | NCBITaxon:9606 | 21 |
| 16380 | CL:0011020 | PATO:0000461 | EFO:0030059 | HANCESTRO:0005 | PATO:0000383 | NCBITaxon:9606 | 8 |
| 16381 | CL:0011020 | PATO:0000461 | EFO:0030007 | HANCESTRO:0005 | PATO:0000383 | NCBITaxon:9606 | 8 |
16382 rows × 7 columns
In [16]:
Copied!
pred_adata.obs
pred_adata.obs
Out[16]:
| pred_cell_type_ontology_term_id | pred_disease_ontology_term_id | pred_assay_ontology_term_id | pred_self_reported_ethnicity_ontology_term_id | pred_sex_ontology_term_id | pred_organism_ontology_term_id | leiden | |
|---|---|---|---|---|---|---|---|
| 0 | CL:0000171 | PATO:0000461 | EFO:0030002 | HANCESTRO:0027 | PATO:0000383 | NCBITaxon:9606 | 13 |
| 1 | CL:0000765 | PATO:0000461 | EFO:0030002 | HANCESTRO:0027 | PATO:0000384 | NCBITaxon:9606 | 1 |
| 2 | CL:0000171 | PATO:0000461 | EFO:0009899 | HANCESTRO:0005 | PATO:0000384 | NCBITaxon:9606 | 1 |
| 3 | CL:0000155 | PATO:0000461 | EFO:0030002 | HANCESTRO:0027 | PATO:0000384 | NCBITaxon:9606 | 1 |
| 4 | CL:1000271 | PATO:0000461 | EFO:0011025 | HANCESTRO:0027 | PATO:0000383 | NCBITaxon:9606 | 6 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 10011 | CL:0000155 | PATO:0000461 | EFO:0030002 | HANCESTRO:0027 | PATO:0000383 | NCBITaxon:9606 | 0 |
| 10012 | CL:1000343 | PATO:0000461 | EFO:0009899 | HANCESTRO:0005 | PATO:0000384 | NCBITaxon:9606 | 15 |
| 10013 | CL:0002064 | PATO:0000461 | EFO:0009899 | HANCESTRO:0005 | PATO:0000384 | NCBITaxon:9606 | 15 |
| 10014 | CL:0002064 | PATO:0000461 | EFO:0009899 | HANCESTRO:0005 | PATO:0000384 | NCBITaxon:9606 | 15 |
| 10015 | CL:0002064 | PATO:0000461 | EFO:0030002 | HANCESTRO:0027 | PATO:0000384 | NCBITaxon:9606 | 15 |
10016 rows × 7 columns
In [105]:
Copied!
expr = np.array(model.expr_pred[0])
expr = np.array(model.expr_pred[0])
In [106]:
Copied!
expr
expr
Out[106]:
array([[ 5.4566784, 4.6118417, 4.0348144, ..., 2.1848714,
3.9399052, 2.5233817],
[ 3.208337 , 2.5515165, 1.9921018, ..., 13.936381 ,
13.130431 , 6.312366 ],
[ 4.8954573, 5.087985 , 3.9134948, ..., 2.8555708,
4.554164 , 101.15237 ],
...,
[ 3.984802 , 4.126377 , 3.6083279, ..., 115.68789 ,
57.15791 , 16.6339 ],
[ 5.9352045, 4.8548284, 5.2390785, ..., 1.6146911,
36.804073 , 55.141655 ],
[ 5.808983 , 4.903611 , 3.8128808, ..., 7.2152896,
4.651722 , 2.080887 ]], dtype=float32)
In [68]:
Copied!
expr.shape
expr.shape
Out[68]:
array([[131.65997 , 29.549952 , 19.607767 , ..., 1.1995907 ,
7.7010403 , 0.73472875],
[352.7837 , 88.84994 , 61.664867 , ..., 8.065009 ,
1.6457222 , 1.1151838 ],
[179.33528 , 57.207157 , 43.435993 , ..., 3.1727197 ,
2.1062756 , 0.9593918 ],
...,
[223.08798 , 118.48114 , 54.776947 , ..., 1.082682 ,
1.2685677 , 2.6607585 ],
[130.19698 , 97.51303 , 44.644714 , ..., 3.5467129 ,
2.2846248 , 0.8338242 ],
[184.8851 , 42.39703 , 12.98097 , ..., 40.693264 ,
37.05164 , 2.698467 ]], dtype=float32)
In [107]:
Copied!
expr = np.array(model.expr_pred[0])
expr[np.random.binomial(1, p=np.array(torch.nn.functional.sigmoid(model.expr_pred[2].to(torch.float32)))).astype(bool)] = 0
#expr[expr<=0.5] = 0
#expr[(expr<=1) & (expr>0.5)] = 1
expr = np.array(model.expr_pred[0])
expr[np.random.binomial(1, p=np.array(torch.nn.functional.sigmoid(model.expr_pred[2].to(torch.float32)))).astype(bool)] = 0
#expr[expr<=0.5] = 0
#expr[(expr<=1) & (expr>0.5)] = 1
In [108]:
Copied!
for i in dataloader:
print(i["x"].shape)
break
for i in dataloader:
print(i["x"].shape)
break
torch.Size([64, 1100])
In [1]:
Copied!
size = 64
size = 64
In [56]:
Copied!
expr[:size].mean(), expr[:size].max(), (expr[:size]==0).sum()
expr[:size].mean(), expr[:size].max(), (expr[:size]==0).sum()
Out[56]:
(4.1190968, 310.3621, 126)
In [71]:
Copied!
expr[:size].mean(), expr[:size].max(), (expr[:size]==0).sum()
expr[:size].mean(), expr[:size].max(), (expr[:size]==0).sum()
Out[71]:
(3.9165297, 923.2358, 899)
In [72]:
Copied!
i['x'].mean(), i['x'].max(), (i['x']==0).sum()
i['x'].mean(), i['x'].max(), (i['x']==0).sum()
Out[72]:
(tensor(5.8350), tensor(1597.0111), tensor(11357))
In [109]:
Copied!
import matplotlib.pyplot as plt
import numpy as np
# Compute correlation coefficient
corr_coef = np.corrcoef(expr[:size], i['x'])[size:, :]
# Plot correlation coefficient
plt.figure(figsize=(10, 5))
plt.imshow(corr_coef, cmap='coolwarm', interpolation='none')
plt.colorbar()
plt.title('Correlation Coefficient of expr and i["x"]')
plt.show()
import matplotlib.pyplot as plt
import numpy as np
# Compute correlation coefficient
corr_coef = np.corrcoef(expr[:size], i['x'])[size:, :]
# Plot correlation coefficient
plt.figure(figsize=(10, 5))
plt.imshow(corr_coef, cmap='coolwarm', interpolation='none')
plt.colorbar()
plt.title('Correlation Coefficient of expr and i["x"]')
plt.show()
In [73]:
Copied!
import matplotlib.pyplot as plt
import numpy as np
# Compute correlation coefficient
corr_coef = np.corrcoef(expr[:size], i['x'])[size:, :]
# Plot correlation coefficient
plt.figure(figsize=(10, 5))
plt.imshow(corr_coef, cmap='coolwarm', interpolation='none')
plt.colorbar()
plt.title('Correlation Coefficient of expr and i["x"]')
plt.show()
import matplotlib.pyplot as plt
import numpy as np
# Compute correlation coefficient
corr_coef = np.corrcoef(expr[:size], i['x'])[size:, :]
# Plot correlation coefficient
plt.figure(figsize=(10, 5))
plt.imshow(corr_coef, cmap='coolwarm', interpolation='none')
plt.colorbar()
plt.title('Correlation Coefficient of expr and i["x"]')
plt.show()
In [63]:
Copied!
import matplotlib.pyplot as plt
import numpy as np
# Compute correlation coefficient
corr_coef = np.corrcoef(expr[:size], i['x'])[size:, :]
# Plot correlation coefficient
plt.figure(figsize=(10, 5))
plt.imshow(corr_coef, cmap='coolwarm', interpolation='none')
plt.colorbar()
plt.title('Correlation Coefficient of expr and i["x"]')
plt.show()
import matplotlib.pyplot as plt
import numpy as np
# Compute correlation coefficient
corr_coef = np.corrcoef(expr[:size], i['x'])[size:, :]
# Plot correlation coefficient
plt.figure(figsize=(10, 5))
plt.imshow(corr_coef, cmap='coolwarm', interpolation='none')
plt.colorbar()
plt.title('Correlation Coefficient of expr and i["x"]')
plt.show()
In [77]:
Copied!
pred_adata
pred_adata
Out[77]:
AnnData object with n_obs × n_vars = 10112 × 128
obs: 'pred_cell_type_ontology_term_id', 'pred_disease_ontology_term_id', 'pred_assay_ontology_term_id', 'pred_self_reported_ethnicity_ontology_term_id', 'pred_sex_ontology_term_id', 'pred_organism_ontology_term_id', 'leiden'
uns: 'leiden', 'neighbors', 'pred_assay_ontology_term_id_colors', 'pred_cell_type_ontology_term_id_colors', 'pred_disease_ontology_term_id_colors', 'pred_organism_ontology_term_id_colors', 'pred_self_reported_ethnicity_ontology_term_id_colors', 'pred_sex_ontology_term_id_colors', 'umap'
obsm: 'X_pca', 'X_umap'
obsp: 'connectivities', 'distances'
In [19]:
Copied!
adata
adata
Out[19]:
AnnData object with n_obs × n_vars = 16382 × 70116
obs: 'tech', 'celltype', 'size_factors', 'organism_ontology_term_id'
var: 'uid', 'symbol', 'stable_id', 'ncbi_gene_ids', 'biotype', 'description', 'synonyms', 'organism_id', 'public_source_id', 'created_at', 'updated_at', 'created_by_id', 'mt', 'ribo', 'hb', 'organism'
uns: 'unseen_genes'
In [24]:
Copied!
subadata = adata#[:10112] # 10016
subadata = adata#[:10112] # 10016
In [75]:
Copied!
sc.pp.pca(subadata)
sc.pp.pca(subadata)
/home/ml4ig1/miniconda3/envs/scprint/lib/python3.10/site-packages/scanpy/preprocessing/_pca.py:229: ImplicitModificationWarning: Setting element `.obsm['X_pca']` of view, initializing view as actual. adata.obsm['X_pca'] = X_pca
In [25]:
Copied!
pred_adata.obs.index = subadata.obs.index
#subadata.obsm["Unintegrated"] = subadata.obsm["X_pca"]
subadata.obsm["X_umap"] = pred_adata.obsm["X_umap"]
subadata.obsm["scprint"] = pred_adata.X
pred_adata.obs.index = subadata.obs.index
subadata.obs = pd.concat([subadata.obs, pred_adata.obs], axis=1)
pred_adata.obs.index = subadata.obs.index
#subadata.obsm["Unintegrated"] = subadata.obsm["X_pca"]
subadata.obsm["X_umap"] = pred_adata.obsm["X_umap"]
subadata.obsm["scprint"] = pred_adata.X
pred_adata.obs.index = subadata.obs.index
subadata.obs = pd.concat([subadata.obs, pred_adata.obs], axis=1)
In [26]:
Copied!
subadata.obs.loc[subadata.obs.pred_cell_type_ontology_term_id=="CL:0000171"].celltype.value_counts() #type A
subadata.obs.loc[subadata.obs.pred_cell_type_ontology_term_id=="CL:0000171"].celltype.value_counts() #type A
Out[26]:
celltype acinar 0 activated_stellate 0 alpha 0 beta 0 delta 0 ductal 0 endothelial 0 epsilon 0 gamma 0 macrophage 0 mast 0 quiescent_stellate 0 schwann 0 t_cell 0 Name: count, dtype: int64
In [81]:
Copied!
subadata.obs.loc[subadata.obs.pred_cell_type_ontology_term_id=="CL:0000171"].celltype.value_counts() #type A
subadata.obs.loc[subadata.obs.pred_cell_type_ontology_term_id=="CL:0000171"].celltype.value_counts() #type A
Out[81]:
alpha 210 beta 169 acinar 41 gamma 22 ductal 21 delta 18 macrophage 2 activated_stellate 1 epsilon 1 quiescent_stellate 1 endothelial 0 mast 0 schwann 0 t_cell 0 Name: celltype, dtype: int64
In [33]:
Copied!
subadata.obs.loc[subadata.obs.pred_cell_type_ontology_term_id=="CL:0000171"].celltype.value_counts() #type A
subadata.obs.loc[subadata.obs.pred_cell_type_ontology_term_id=="CL:0000171"].celltype.value_counts() #type A
Out[33]:
alpha 221 beta 164 ductal 84 acinar 26 gamma 23 endothelial 16 delta 12 quiescent_stellate 5 activated_stellate 4 macrophage 3 epsilon 2 mast 0 schwann 0 t_cell 0 Name: celltype, dtype: int64
In [82]:
Copied!
subadata.obs.loc[subadata.obs.pred_cell_type_ontology_term_id=="CL:0000169"].celltype.value_counts() #typeB
subadata.obs.loc[subadata.obs.pred_cell_type_ontology_term_id=="CL:0000169"].celltype.value_counts() #typeB
Out[82]:
beta 23 delta 5 acinar 3 alpha 2 activated_stellate 0 ductal 0 endothelial 0 epsilon 0 gamma 0 macrophage 0 mast 0 quiescent_stellate 0 schwann 0 t_cell 0 Name: celltype, dtype: int64
In [32]:
Copied!
subadata.obs.loc[subadata.obs.pred_cell_type_ontology_term_id=="CL:0000169"].celltype.value_counts() #typeB
subadata.obs.loc[subadata.obs.pred_cell_type_ontology_term_id=="CL:0000169"].celltype.value_counts() #typeB
Out[32]:
beta 715 alpha 399 delta 142 gamma 37 acinar 24 ductal 7 activated_stellate 2 epsilon 2 endothelial 0 macrophage 0 mast 0 quiescent_stellate 0 schwann 0 t_cell 0 Name: celltype, dtype: int64
In [83]:
Copied!
subadata.obs.loc[subadata.obs.celltype=="alpha", ['pred_cell_type_ontology_term_id']].value_counts().head(10)
subadata.obs.loc[subadata.obs.celltype=="alpha", ['pred_cell_type_ontology_term_id']].value_counts().head(10)
Out[83]:
pred_cell_type_ontology_term_id CL:1000271 1096 CL:0009002 562 CL:0000158 383 CL:0000171 210 CL:0000155 195 CL:0000583 176 CL:0000670 127 CL:0000890 113 CL:0000765 77 CL:0002063 47 dtype: int64
In [ ]:
Copied!
subadata.obs.loc[subadata.obs.celltype=="acinar", ['pred_cell_type_ontology_term_id']].value_counts()
subadata.obs.loc[subadata.obs.celltype=="acinar", ['pred_cell_type_ontology_term_id']].value_counts()
In [24]:
Copied!
sc.pl.scatter(subadata, basis="umap", color=["celltype", "pred_cell_type_ontology_term_id", "tech", "pred_assay_ontology_term_id"])
sc.pl.scatter(subadata, basis="umap", color=["celltype", "pred_cell_type_ontology_term_id", "tech", "pred_assay_ontology_term_id"])
In [ ]:
Copied!
In [84]:
Copied!
bm = Benchmarker(
subadata,
batch_key="tech",
label_key="celltype",
embedding_obsm_keys=["Unintegrated", "scprint"],
n_jobs=6,
)
bm.benchmark()
bm = Benchmarker(
subadata,
batch_key="tech",
label_key="celltype",
embedding_obsm_keys=["Unintegrated", "scprint"],
n_jobs=6,
)
bm.benchmark()
--------------------------------------------------------------------------- KeyboardInterrupt Traceback (most recent call last) Cell In[84], line 8 1 bm = Benchmarker( 2 subadata, 3 batch_key="tech", (...) 6 n_jobs=6, 7 ) ----> 8 bm.benchmark() File ~/miniconda3/envs/scprint/lib/python3.10/site-packages/scib_metrics/benchmark/_core.py:205, in Benchmarker.benchmark(self) 199 warnings.warn( 200 "The benchmark has already been run. Running it again will overwrite the previous results.", 201 UserWarning, 202 ) 204 if not self._prepared: --> 205 self.prepare() 207 num_metrics = sum( 208 [sum([v is not False for v in asdict(met_col)]) for met_col in self._metric_collection_dict.values()] 209 ) 211 for emb_key, ad in tqdm(self._emb_adatas.items(), desc="Embeddings", position=0, colour="green"): File ~/miniconda3/envs/scprint/lib/python3.10/site-packages/scib_metrics/benchmark/_core.py:174, in Benchmarker.prepare(self, neighbor_computer) 170 # Compute PCA 171 if self._pre_integrated_embedding_obsm_key is None: 172 # This is how scib does it 173 # https://github.com/theislab/scib/blob/896f689e5fe8c57502cb012af06bed1a9b2b61d2/scib/metrics/pcr.py#L197 --> 174 sc.tl.pca(self._adata, use_highly_variable=False) 175 self._pre_integrated_embedding_obsm_key = "X_pca" 177 for emb_key in self._embedding_obsm_keys: File ~/miniconda3/envs/scprint/lib/python3.10/site-packages/scanpy/preprocessing/_pca.py:200, in pca(data, n_comps, zero_center, svd_solver, random_state, return_info, use_highly_variable, dtype, copy, chunked, chunk_size) 194 if svd_solver not in {'lobpcg', 'arpack'}: 195 raise ValueError( 196 'svd_solver: {svd_solver} can not be used with sparse input.\n' 197 'Use "arpack" (the default) or "lobpcg" instead.' 198 ) --> 200 output = _pca_with_sparse( 201 X, n_comps, solver=svd_solver, random_state=random_state 202 ) 203 # this is just a wrapper for the results 204 X_pca = output['X_pca'] File ~/miniconda3/envs/scprint/lib/python3.10/site-packages/scanpy/preprocessing/_pca.py:303, in _pca_with_sparse(X, npcs, solver, mu, random_state) 292 return XHmat(x) - mhmat(ones(x)) 294 XL = LinearOperator( 295 matvec=matvec, 296 dtype=X.dtype, (...) 300 rmatmat=rmatmat, 301 ) --> 303 u, s, v = svds(XL, solver=solver, k=npcs, v0=random_init) 304 u, v = svd_flip(u, v) 305 idx = np.argsort(-s) File ~/miniconda3/envs/scprint/lib/python3.10/site-packages/scipy/sparse/linalg/_eigen/_svds.py:525, in svds(A, k, ncv, tol, which, v0, maxiter, return_singular_vectors, solver, random_state, options) 523 if v0 is None: 524 v0 = random_state.standard_normal(size=(min(A.shape),)) --> 525 _, eigvec = eigsh(XH_X, k=k, tol=tol ** 2, maxiter=maxiter, 526 ncv=ncv, which=which, v0=v0) 527 # arpack do not guarantee exactly orthonormal eigenvectors 528 # for clustered eigenvalues, especially in complex arithmetic 529 eigvec, _ = np.linalg.qr(eigvec) File ~/miniconda3/envs/scprint/lib/python3.10/site-packages/scipy/sparse/linalg/_eigen/arpack/arpack.py:1697, in eigsh(A, k, M, sigma, which, v0, ncv, maxiter, tol, return_eigenvectors, Minv, OPinv, mode) 1695 with _ARPACK_LOCK: 1696 while not params.converged: -> 1697 params.iterate() 1699 return params.extract(return_eigenvectors) File ~/miniconda3/envs/scprint/lib/python3.10/site-packages/scipy/sparse/linalg/_eigen/arpack/arpack.py:549, in _SymmetricArpackParams.iterate(self) 546 elif self.ido == 1: 547 # compute y = Op*x 548 if self.mode == 1: --> 549 self.workd[yslice] = self.OP(self.workd[xslice]) 550 elif self.mode == 2: 551 self.workd[xslice] = self.OPb(self.workd[xslice]) File ~/miniconda3/envs/scprint/lib/python3.10/site-packages/scipy/sparse/linalg/_interface.py:234, in LinearOperator.matvec(self, x) 231 if x.shape != (N,) and x.shape != (N,1): 232 raise ValueError('dimension mismatch') --> 234 y = self._matvec(x) 236 if isinstance(x, np.matrix): 237 y = asmatrix(y) File ~/miniconda3/envs/scprint/lib/python3.10/site-packages/scipy/sparse/linalg/_interface.py:591, in _CustomLinearOperator._matvec(self, x) 590 def _matvec(self, x): --> 591 return self.__matvec_impl(x) File ~/miniconda3/envs/scprint/lib/python3.10/site-packages/scipy/sparse/linalg/_eigen/_svds.py:469, in svds.<locals>.matvec_XH_X(x) 468 def matvec_XH_X(x): --> 469 return XH_dot(X_dot(x)) File ~/miniconda3/envs/scprint/lib/python3.10/site-packages/scipy/sparse/linalg/_interface.py:234, in LinearOperator.matvec(self, x) 231 if x.shape != (N,) and x.shape != (N,1): 232 raise ValueError('dimension mismatch') --> 234 y = self._matvec(x) 236 if isinstance(x, np.matrix): 237 y = asmatrix(y) File ~/miniconda3/envs/scprint/lib/python3.10/site-packages/scipy/sparse/linalg/_interface.py:591, in _CustomLinearOperator._matvec(self, x) 590 def _matvec(self, x): --> 591 return self.__matvec_impl(x) File ~/miniconda3/envs/scprint/lib/python3.10/site-packages/scanpy/preprocessing/_pca.py:283, in _pca_with_sparse.<locals>.matvec(x) 282 def matvec(x): --> 283 return Xdot(x) - mdot(x) File ~/miniconda3/envs/scprint/lib/python3.10/site-packages/scipy/sparse/_base.py:411, in _spbase.dot(self, other) 409 return self * other 410 else: --> 411 return self @ other File ~/miniconda3/envs/scprint/lib/python3.10/site-packages/scipy/sparse/_base.py:624, in _spbase.__matmul__(self, other) 621 if isscalarlike(other): 622 raise ValueError("Scalar operands are not allowed, " 623 "use '*' instead") --> 624 return self._mul_dispatch(other) File ~/miniconda3/envs/scprint/lib/python3.10/site-packages/scipy/sparse/_base.py:522, in _spbase._mul_dispatch(self, other) 519 if other.__class__ is np.ndarray: 520 # Fast path for the most common case 521 if other.shape == (N,): --> 522 return self._mul_vector(other) 523 elif other.shape == (N, 1): 524 return self._mul_vector(other.ravel()).reshape(M, 1) File ~/miniconda3/envs/scprint/lib/python3.10/site-packages/scipy/sparse/_compressed.py:488, in _cs_matrix._mul_vector(self, other) 486 # csr_matvec or csc_matvec 487 fn = getattr(_sparsetools, self.format + '_matvec') --> 488 fn(M, N, self.indptr, self.indices, self.data, other, result) 490 return result KeyboardInterrupt:
In [180]:
Copied!
bm.plot_results_table(min_max_scale=False)
bm.plot_results_table(min_max_scale=False)
/home/ml4ig1/miniconda3/envs/scprint/lib/python3.10/site-packages/pandas/core/dtypes/cast.py:1641: DeprecationWarning: np.find_common_type is deprecated. Please use `np.result_type` or `np.promote_types`. See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information. (Deprecated NumPy 1.25) return np.find_common_type(types, []) /home/ml4ig1/miniconda3/envs/scprint/lib/python3.10/site-packages/numpy/lib/nanfunctions.py:1215: RuntimeWarning: Mean of empty slice return np.nanmean(a, axis, out=out, keepdims=keepdims)
Out[180]:
<plottable.table.Table at 0x7f4ee756d030>
In [ ]:
Copied!
In [ ]:
Copied!
In [ ]:
Copied!
adata.X.iloc[0].to_array()
adata.X.iloc[0].to_array()
In [ ]:
Copied!
model
model
results from running it on most famous methods¶

In [ ]:
Copied!
## load a random dataset
## load a random dataset
In [ ]:
Copied!
## do scGPT finetuning task (reusing the helper functions I have) and the notebook that they provide
## do scGPT finetuning task (reusing the helper functions I have) and the notebook that they provide
In [ ]:
Copied!
## create a task/function for scPrint
## create a task/function for scPrint
In [ ]:
Copied!
## try to do an embedding from the regulon / single cell type matrix outputted by scenic
## try to do an embedding from the regulon / single cell type matrix outputted by scenic
In [ ]:
Copied!
## push it to a function on BenGRN
## push it to a function on BenGRN
In [ ]:
Copied!